package com.zxl.grocery.microservice.crawler.processor;

import com.zxl.grocery.microservice.crawler.common.base.AbstractPageProcessor;
import com.zxl.grocery.microservice.crawler.common.base.Constant;
import com.zxl.grocery.microservice.crawler.entity.CrawlerNews;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * @Author: Lzx
 * @Description: 知乎发现
 * @Date: Created in 11:49 2018/1/16
 * @Modified By:
 */
@Component
public class ZhiHuProcessor extends AbstractPageProcessor{
    public static final String list = "https://www.zhihu.com/explore";
    @Override
    public String[] initTargetUrl() {
        return new String[]{"https://www.zhihu.com/explore"};
    }

    @Override
    public void process(Page page) {
        if (page.getUrl().regex(list).match()) {
            List<Selectable> list=page.getHtml().xpath("//div[@data-type='daily']/div[@class='explore-feed feed-item']").nodes();
            for (Selectable s : list) {
                String title=s.xpath("//h2/a/text()").toString();
                String link=s.xpath("//h2").links().toString();
                if (StringUtils.isNotEmpty(title) && StringUtils.isNotEmpty(link)) {
                    CrawlerNews news = new CrawlerNews();
                    news.setTitle(title);
                    news.setInfo(title);
                    news.setLink(link);
                    news.setTypeId(Constant.Type_ZhiHu);
                    news.setSourcesId(Constant.Sources_ZhiHu);
                    page.putField("news" + title, news);
                }
            }
        }
    }
}
